import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import os
import numpy as np
from scipy.stats import linregress
import gmaps
import plotly.express as px
happiness_path = os.path.join("Resources", "HappinessAlcoholConsumption.csv")
income_path = os.path.join("Resources", "countries_income_group.csv")
country_path = os.path.join("Resources", "world_country_and_usa_states_latitude_and_longitude_values.csv")
happiness_study_df = pd.read_csv(happiness_path)
income_study_df = pd.read_csv(income_path, index_col=[0])
df_lat = pd.read_csv(country_path)
df_lat = df_lat[['latitude','longitude','country']]
df_lat.columns = ['latitude','longitude','Country']
df_lat = pd.merge(happiness_study_df,df_lat,on='Country')
income_study_df.rename(columns = {'Economy' : 'Country'}, inplace = True)
alcohol_study = pd.merge(happiness_study_df, income_study_df, on = "Country")
final_alcohol = alcohol_study.drop(columns=['Code', 'Region_y'])
final_alcohol
| Country | Region_x | Hemisphere | HappinessScore | HDI | GDP_PerCapita | Beer_PerCapita | Spirit_PerCapita | Wine_PerCapita | Income group | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Denmark | Western Europe | north | 7.526 | 928 | 53.579 | 224 | 81 | 278 | High income |
| 1 | Switzerland | Western Europe | north | 7.509 | 943 | 79.866 | 185 | 100 | 280 | High income |
| 2 | Iceland | Western Europe | north | 7.501 | 933 | 60.530 | 233 | 61 | 78 | High income |
| 3 | Norway | Western Europe | north | 7.498 | 951 | 70.890 | 169 | 71 | 129 | High income |
| 4 | Finland | Western Europe | north | 7.413 | 918 | 43.433 | 263 | 133 | 97 | High income |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 107 | Madagascar | Sub-Saharan Africa | south | 3.695 | 517 | 402.000 | 26 | 15 | 4 | Low income |
| 108 | Tanzania | Sub-Saharan Africa | south | 3.666 | 533 | 878.000 | 36 | 6 | 1 | Low income |
| 109 | Liberia | Sub-Saharan Africa | north | 3.622 | 432 | 455.000 | 19 | 152 | 2 | Low income |
| 110 | Benin | Sub-Saharan Africa | north | 3.484 | 512 | 789.000 | 34 | 4 | 13 | Low income |
| 111 | Togo | Sub-Saharan Africa | north | 3.303 | 500 | 577.000 | 36 | 2 | 19 | Low income |
112 rows × 10 columns
#Filter to find top5 happiest countries
top5 = final_alcohol.loc[final_alcohol['HappinessScore'] > 7.41]
top5
| Country | Region_x | Hemisphere | HappinessScore | HDI | GDP_PerCapita | Beer_PerCapita | Spirit_PerCapita | Wine_PerCapita | Income group | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Denmark | Western Europe | north | 7.526 | 928 | 53.579 | 224 | 81 | 278 | High income |
| 1 | Switzerland | Western Europe | north | 7.509 | 943 | 79.866 | 185 | 100 | 280 | High income |
| 2 | Iceland | Western Europe | north | 7.501 | 933 | 60.530 | 233 | 61 | 78 | High income |
| 3 | Norway | Western Europe | north | 7.498 | 951 | 70.890 | 169 | 71 | 129 | High income |
| 4 | Finland | Western Europe | north | 7.413 | 918 | 43.433 | 263 | 133 | 97 | High income |
#variables for bar graph
x = top5['Country']
y = top5['HappinessScore']
color = ['#1053e3', '#1072e3', '#1092e3','#10abe3','#10bce3']
#plot the bar graph
plt.bar(x,y, color=color)
#add labels and styling to graph
plt.xticks(fontsize=15, fontweight='bold')
plt.xlabel('Countries', fontsize=15, labelpad=35)
plt.ylabel('Happiness Score', fontsize=15, labelpad=35)
plt.title('Top Five Happiest Countries', fontsize=25, loc='center', fontweight='bold')
plt.ylim(7.35,7.55)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
#Filter to find bottom5 least happiest countries
bottom5 = final_alcohol.loc[final_alcohol['HappinessScore'] <= 3.7]
sortbottom= bottom5.sort_values('HappinessScore', ascending=True)
#variables for graph
x = sortbottom['Country']
y = sortbottom['HappinessScore']
color = ['#E3504D', '#E3754D', '#E39B4D','#E3B94D','#E3D74D']
#plot the graph
plt.bar(x,y, color=color)
#labeling and stying for graph
plt.xticks(fontsize=15, fontweight='bold')
plt.xlabel('Countries', fontsize=15, labelpad=35)
plt.ylabel('Happiness Score', fontsize=15, labelpad=35)
plt.title('Bottom Five Least Happiest Countries',fontsize=25, loc='center', fontweight='bold')
plt.ylim(3.0,3.8)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
beer = final_alcohol['Beer_PerCapita'].mean()
spirit = final_alcohol['Spirit_PerCapita'].mean()
wine = final_alcohol['Wine_PerCapita'].mean()
best_alcohol = pd.DataFrame({"Average Beer Per Capita" : [beer],
"Average Spirit Per Capita" : spirit,
"Average Wine Per Capita" : wine})
best_alcohol.head()
| Average Beer Per Capita | Average Spirit Per Capita | Average Wine Per Capita | |
|---|---|---|---|
| 0 | 141.258929 | 100.071429 | 70.276786 |
alcohol = ["Beer", "Spirit", "Wine"]
avg_pc = [141.26, 100.07, 70.28]
x_axis = np.arange(len(avg_pc))
plt.bar(x_axis, avg_pc, color = "g", alpha = .8, align= 'center')
plt.xticks(x_axis, alcohol,fontsize=15, fontweight='bold')
plt.title("Beer vs Wine vs Spirit per Capita",fontsize=25, loc='center', fontweight='bold')
plt.xlabel("Alcohol",fontsize=15, labelpad=35)
plt.ylabel("Average Alcohol Type Per Capita",fontsize=15, labelpad=35)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
top5 = final_alcohol.loc[final_alcohol['HappinessScore']>7.41]
x_axis = np.arange(len(top5['Country']))
plt.bar(x_axis -.10, top5['Beer_PerCapita'], width=0.2, label = 'Beer')
plt.bar(x_axis +.10, top5['Spirit_PerCapita'], width=0.2, label = 'Spirits')
plt.bar(x_axis +.10*3, top5['Wine_PerCapita'], width=0.2, label = 'Wine')
plt.title("Alcohol of Choice for Top Five",fontsize=25, loc='center', fontweight='bold')
plt.xticks(x_axis, top5['Country'],fontsize=15, fontweight='bold')
plt.legend(fontsize=15)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
bottom5 = final_alcohol.loc[final_alcohol['HappinessScore']<=3.7]
x_axis = np.arange(len(bottom5['Country']))
plt.bar(x_axis -.10, bottom5['Beer_PerCapita'], width=0.2, label = 'Beer')
plt.bar(x_axis +.10, bottom5['Spirit_PerCapita'], width=0.2, label = 'Spirits')
plt.bar(x_axis +.10*3, bottom5['Wine_PerCapita'], width=0.2, label = 'Wine')
plt.title("Alcohol of Choice for Bottom Five",fontsize=25, loc='center', fontweight='bold')
plt.xticks(x_axis, bottom5['Country'],fontsize=15, fontweight='bold')
plt.legend(fontsize=15)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
x = final_alcohol['Income group']
y= final_alcohol['HappinessScore']
plt.scatter(x,y,s=500, edgecolor='k', facecolors='#32a852')
plt.ylim(3,8)
plt.xticks(fontsize=15, fontweight='bold')
plt.xlabel('Income Level', fontsize=15, labelpad=35)
plt.ylabel('Happiness Score', fontsize=15, labelpad=35)
plt.title('Income vs. Happiness Score',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
# Calculate the correlation coefficient and linear regression model
# for GDP per Capita and HappinessScore
GDP_per = final_alcohol.iloc[:,5]
Happiness_score = final_alcohol.iloc[:,3]
correlation = st.pearsonr(GDP_per,Happiness_score)
print(f"The correlation between GDP per Capita and Happiness Score is {round(correlation[0],2)}.")
# Calculate linear regression
x_values = final_alcohol['HappinessScore']
y_values = final_alcohol['GDP_PerCapita']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = x_values * slope + intercept
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.5,200.0),fontsize=20,color="red",fontweight='bold')
# Add labels and title to plot
plt.xlabel("Happiness Score", fontsize=15, labelpad=35)
plt.ylabel("GDP per Capita", fontsize=15, labelpad=35)
plt.title('GDP per Capita vs Happiness Score',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
The correlation between GDP per Capita and Happiness Score is -0.49.
GDP_per = final_alcohol.iloc[:,5]
Beer_score = final_alcohol.iloc[:,6]
correlation = st.pearsonr(GDP_per,Beer_score)
print(f"The correlation between GDP per Capita and Beer Per Capita is {round(correlation[0],2)}.")
# Calculate linear regression
x_values = final_alcohol['Beer_PerCapita']
y_values = final_alcohol['GDP_PerCapita']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = x_values * slope + intercept
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(200.0,200.0),fontsize=20,color="red",fontweight='bold')
# Add labels and title to plot
plt.xlabel("Beer Per Capita", fontsize=15, labelpad=35)
plt.ylabel("GDP per Capita", fontsize=15, labelpad=35)
plt.title('GDP per Capita vs Beer Per Capita',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
The correlation between GDP per Capita and Beer Per Capita is -0.4.
GDP_per = final_alcohol.iloc[:,5]
Wine_score = final_alcohol.iloc[:,8]
correlation = st.pearsonr(GDP_per,Wine_score)
print(f"The correlation between GDP per Capita and Wine Per Capita is {round(correlation[0],2)}.")
# Calculate linear regression
x_values = final_alcohol['Wine_PerCapita']
y_values = final_alcohol['GDP_PerCapita']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = x_values * slope + intercept
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(200.0,200.0),fontsize=20,color="red",fontweight='bold')
# Add labels and title to plot
plt.xlabel("Wine Per Capita", fontsize=15, labelpad=35)
plt.ylabel("GDP per Capita", fontsize=15, labelpad=35)
plt.title('GDP per Capita vs Wine Per Capita',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
The correlation between GDP per Capita and Wine Per Capita is -0.21.
GDP_per = final_alcohol.iloc[:,5]
Spirit_score = final_alcohol.iloc[:,7]
correlation = st.pearsonr(GDP_per,Spirit_score)
print(f"The correlation between GDP per Capita and Spirit Per Capita is {round(correlation[0],2)}.")
# Calculate linear regression
x_values = final_alcohol['Spirit_PerCapita']
y_values = final_alcohol['GDP_PerCapita']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = x_values * slope + intercept
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(200.0,200.0),fontsize=20,color="red",fontweight='bold')
# Add labels and title to plot
plt.xlabel("Spirit Per Capita", fontsize=15, labelpad=35)
plt.ylabel("GDP per Capita", fontsize=15, labelpad=35)
plt.title('GDP per Capita vs Spirit Per Capita',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
The correlation between GDP per Capita and Spirit Per Capita is -0.28.
fig = px.choropleth(df_lat,
locations="Country",
color="HappinessScore",
locationmode = 'country names',
hover_name="Country",
range_color=[0,8],
title='Country vs happiness score')
fig.show()